## Project: Presidential Negative Partisanship
## Author: Benjamin S. Noble
## Code to replicate all tables and figures

library(ggrepel)
library(ggridges)
library(fixest)
library(marginaleffects)
library(modelsummary)
library(quanteda)
library(stm)
library(kableExtra)
library(tidyverse)

options("modelsummary_format_numeric_latex" = "mathmode")

# load data ----------------------------------------------------------------- # 

# setwd('')

# load custom ggplot theme to replicate plot formatting
source('ggtheme_baselike.R')

# document level data
ref_doc_lev <- read_csv('doc_lev_df.csv') %>% 
    mutate(time3 = factor(time3, levels = c('non_compet', 'early', 'late')))
# paragraph level data
ref_df <- read_csv('para_lev_df.csv') %>% 
    mutate(time3 = factor(time3, levels = c('non_compet', 'early', 'late')))
# TAPS survey data 
taps_merge <- read_csv('taps_merge.csv')

# Table 1 ------------------------------------------------------------------- #

# create human-readable column names
pres_table <- tibble(ord = c(1:15),
    president = c('fdr', 'truman', 'eisenhower', 'kennedy', 'johnson', 'nixon', 
        'ford', 'carter', 'reagan', 'hbush','clinton','wbush', 'obama', 
        'trump', 'biden'),
    plot_pres = c('F. Roosevelt', 'Truman', 'Eisenhower', 'Kennedy', 'Johnson', 
        'Nixon', 'Ford', 'Carter', 'Reagan', 'H. Bush','Clinton','W. Bush', 
        'Obama', 'Trump', 'Biden')
)

# build aggregate statistics from document-level data
pres_ref_table_df <- ref_doc_lev %>% 
    group_by(president, div_pres) %>% 
    summarise(across(contains('ct'), sum), tot_speech = n(), tot_wc = sum(tot_wc)) %>% 
    left_join(pres_table) %>% 
    arrange(ord) %>% 
    ungroup() %>% 
    select(plot_pres, div_pres, contains('out'), tot_speech, tot_wc) %>% 
    mutate(across(out_pres_ct:all_out_ct, ~./tot_wc * 1000), kwords = round(tot_wc/1000,0)) %>% 
    select(-tot_wc)

kable(pres_ref_table_df, 'simple', escape = FALSE, booktabs = TRUE, format.args = list(big.mark = ","), digits=2)

# Table A1 ------------------------------------------------------------------ #

# hand coded sentiment accuracy 
hc_sent <- read_csv('sentiment-validation.csv') %>% 
    # merge ml labels
    left_join(ref_df %>% select(para_id, sentiment_score_f,snip_sent)) %>% 
    filter(!is.na(bin_score)) %>% 
    mutate(
        # recode continuous ml sentiment scores to three categories to match hand coding
        bin_score_mlf = case_when(sentiment_score_f < 0.33 ~ 1, sentiment_score_f >= 0.66 ~ 3, TRUE ~ 2),
        bin_score_mls = case_when(snip_sent < 0.33 ~ 1, snip_sent >= 0.66 ~ 3, TRUE ~ 2)
    )

# gpt-extracted paragraph sentiment (top table)
table(hc = hc_sent$bin_score, ml = hc_sent$bin_score_mls)
round((table(hc = hc_sent$bin_score, ml = hc_sent$bin_score_mls)[1,1] + table(hc = hc_sent$bin_score, ml = hc_sent$bin_score_mls)[2,2] + table(hc = hc_sent$bin_score, ml = hc_sent$bin_score_mls)[3,3])/sum(table(hc = hc_sent$bin_score, ml = hc_sent$bin_score_mls)),2)
# full text paragraph sentiment (bottom table) 
table(hc = hc_sent$bin_score, ml = hc_sent$bin_score_mlf)
round((table(hc = hc_sent$bin_score, ml = hc_sent$bin_score_mlf)[1,1] + table(hc = hc_sent$bin_score, ml = hc_sent$bin_score_mlf)[2,2] + table(hc = hc_sent$bin_score, ml = hc_sent$bin_score_mlf)[3,3])/sum(table(hc = hc_sent$bin_score, ml = hc_sent$bin_score_mlf)),2)

# Table 2 ------------------------------------------------------------------- #

# read in the text needed for table 2
table2_text <- read_csv('table2_text.csv')

# merge with paragraph-level scores
table2_text %>% 
    left_join(ref_df %>% select(para_id, president, comb_sent)) %>% 
    select(president, text, ai_snip, comb_sent) %>% 
    mutate(comb_sent = round(comb_sent, 2)) %>% 
    kable('simple')

# Figure 1 ------------------------------------------------------------------ # 

sent_ridge <- ggplot(ref_df %>% left_join(pres_table), 
        aes(x = comb_sent, y = reorder(plot_pres, 15 - ord), 
            fill = as.factor(ifelse(all_out_ct > 0, 1, 0)))) + 
    stat_density_ridges(alpha = 0.7, rel_min_height = 0.01, quantile_lines = TRUE, quantiles = 2) +
    theme_baselike() +
    theme(legend.direction = 'horizontal', legend.position = 'bottom') +
    labs(x = 'More Negative Sentiment — More Positive Sentiment', y = '', fill = 'Reference') +
    scale_fill_manual(values = c('grey70', 'grey30'), labels = c('Not Out-Party', 'Out-Party')) + 
    coord_cartesian(xlim = c(-.05, 1.05), ylim = c(1.4,16)) 

# Table 3 ------------------------------------------------------------------- #

insttime_nofe <- feols(tot_out_1k ~ time + div + major_elex + pparty +
    war_days + pres_app + f100 + pterm | month, ref_doc_lev, vcov = 'iid')
fe <- feols(tot_out_1k ~ div + major_elex + war_days + pres_app + f100 + pterm | 
    president + month, ref_doc_lev, vcov = 'iid')
fe_eras <- feols(tot_out_1k ~ time3 + div + major_elex + pparty + war_days + 
    pres_app + f100 + pterm | month, ref_doc_lev, vcov = 'iid')

c_map <- c(
    'time' = 'Majority Competition (80--84th, 97--118th)',
    'div' = 'Divided Government',
    'major_elex' = 'Major Election Season',
    'time3early' = 'Truman Period (80th--84th)',
    'time3late' = 'Modern Period (97th--118th)',
    'ppartyR' = 'Republican',
    'pres_app' = 'Presidential Approval',
    'war_days' = 'Major War',
    'f100' = 'First 100 Days',
    'pterm' = 'Term'
)

modelsummary(models = list(insttime_nofe, fe, fe_eras),
    coef_map = c_map,
    stars = TRUE,
    gof_omit = 'AIC|BIC|FE|Std.',
    output = 'markdown', 
    escape = FALSE)

# Table B1 ------------------------------------------------------------------ #

insttime_sent_nofe <- feols(comb_sent ~ tot_out_1k_para * (time + div + major_elex) + pparty + war_days + pres_app + f100 + pterm | 
    month, ref_df, cluster = ~doc_id)
sent_fe <- feols(comb_sent ~ tot_out_1k_para * (div + major_elex) + war_days + pres_app + f100 + pterm | 
    president + month, ref_df, cluster = ~doc_id)
sent_fe_full <- feols(sentiment_score_f ~ tot_out_1k_para * (div + major_elex) + war_days + pres_app + f100 + pterm | 
    president + month, ref_df, cluster = ~doc_id)
ref_type_sent <- feols(comb_sent ~ out_party_1k_para * (div + major_elex) + war_days + pres_app + f100 + pterm | 
    president + month, ref_df, cluster = ~doc_id)
time3_sent <- feols(comb_sent ~ tot_out_1k_para * (time3 + div + major_elex) + pparty + war_days + pres_app + f100 + pterm | 
    month, ref_df, cluster = ~doc_id)

c_map_s <- c(
    'tot_out_1k_para' = 'Out-Party Ref. per 1,000 Words',
    'out_only_ref' = 'Out-Party References Only (paragraph does not reference in-party)',
    'out_party_1k_para' = 'Party Refs. Only',

    'div_pres' = 'Institutional Variation',
    'tot_out_1k_para:div_pres' = 'Out-Party Ref. x Institutional Variation',
    'time' = 'Majority Competition (80--84th, 97--118th)',
    'tot_out_1k_para:time' = 'Out-Party Ref. x Majority Competition',
    'div' = 'Divided Government',
    'tot_out_1k_para:div' = 'Out-Party Ref. x Divided Government',
    'major_elex' = 'Major Election Season',
    'tot_out_1k_para:major_elex' = 'Out-Party Ref. x Major Election',

    'out_only_ref:div' = 'Out-Party References Only x Divided Government',            
    'out_only_ref:major_elex' = 'Out-Party References Only x Major Elections',

    'out_party_1k_para:div' = 'Party Refs. Only x Divided Government',            
    'out_party_1k_para:major_elex' = 'Party Refs. Only x Major Elections',

    'time3early' = 'Truman Period (80th--84th)',
    'time3late' = 'Modern Period (97th--118th)',
    'tot_out_1k_para:time3early' = 'Out-Party Ref. per 1,000 Words x Truman Period',
    'tot_out_1k_para:time3late' = 'Out-Party Ref. per 1,000 Words x Modern Period',

    'ppartyR' = 'Republican',
    'pres_app' = 'Presidential Approval',
    'war_days' = 'Major War',
    'f100' = 'First 100 Days',
    'pterm' = 'Term'
)

modelsummary(models = list(insttime_sent_nofe, sent_fe, sent_fe_full, 
        ref_type_sent, time3_sent),
    coef_map = c_map_s,
    stars = TRUE,
    gof_omit = 'AIC|BIC|FE|Std.',
    output = 'markdown',
    escape = FALSE)

# Figure 2 ------------------------------------------------------------------ #

# create marginal effects
divp_me <- slopes(insttime_sent_nofe, variables = 'tot_out_1k_para', newdata = datagrid(time = c(0, 1)))
div_me <- slopes(sent_fe, variables = 'tot_out_1k_para', newdata = datagrid(div = c(0, 1)))
elex_me <- slopes(sent_fe, variables = 'tot_out_1k_para', newdata = datagrid(major_elex = c(0, 1)))

# build plot
all_mes <- bind_rows(
        divp_me %>% mutate(var = 'Congressional Competition'),
        div_me %>% mutate(var = 'Divided Government'),
        elex_me %>% mutate(var = 'Major Election')
    ) %>% mutate(x_var = case_when(
        time == 0 & var == 'Congressional Competition' ~ 'Less\nCongressional\nCompetition',
        time == 1 & var == 'Congressional Competition' ~ 'More\nCongressional\nCompetition',
        div == 0 & var == 'Divided Government' ~ 'Unified',
        div == 1 & var == 'Divided Government' ~ 'Divided',
        major_elex == 1 & var == 'Major Election' ~ 'Election Period',
        major_elex == 0 & var == 'Major Election' ~ 'Non-Election Period'),
    x_var = factor(x_var, levels = c('Less\nCongressional\nCompetition', 'More\nCongressional\nCompetition',
        'Unified', 'Divided', 'Election Period', 'Non-Election Period')),
    var = factor(var, levels = c('Congressional Competition', 'Divided Government', 'Major Election')))

all_me_plot <- ggplot(all_mes, aes(x = x_var, y = estimate)) + 
     geom_point() + 
     geom_errorbar(aes(x = x_var, ymin = conf.low, ymax = conf.high), width = 0) +
     facet_wrap(~var, scales = 'free_x') + 
     geom_hline(yintercept = 0, linetype = 'dashed') + 
     theme_baselike() +
     labs(x = '', y = 'Marginal Effect of Additional Out-Party Reference\n(per 1,000 Words) on Paragraph Sentiment')

# Appendix B.1 Bootstrap ---------------------------------------------------- #

# bootstrap tests of different periods
# actual differences
est_time3 <- slopes(time3_sent, variables = 'time3', newdata = datagrid(tot_out_1k_para = c(0, 1)))

# boostrap truman and modern period differences compared to non-competitive periods
early_bs <- late_bs <- c()
set.seed(20250322)
for(i in 1:500){

    doc_ids_bs <- sample(unique(ref_df$doc_id), length(unique(ref_df$doc_id)), replace = TRUE)
    bs_df <- tibble(doc_id = doc_ids_bs) %>% left_join(ref_df)


    time3_sent_bs <- feols(comb_sent ~ tot_out_1k_para * (time3 + div + major_elex) + pparty + war_days + pres_app + f100 + pterm | 
    month, bs_df, cluster = ~doc_id)

    time3_me <- slopes(time3_sent_bs, variables = 'time3', newdata = datagrid(tot_out_1k_para = c(0, 1)))

    early_bs <- c(early_bs, time3_me$estimate[2] - time3_me$estimate[1])
    late_bs <- c(late_bs, time3_me$estimate[4] - time3_me$estimate[3])
}

# truman period bootstrap difference
round(est_time3$estimate[2] - est_time3$estimate[1],4)
round(quantile(early_bs, c(.025, .975)),4)
# modern period bootstrap difference
round(est_time3$estimate[4] - est_time3$estimate[3],4)
round(quantile(late_bs, c(.025,.975)),4)

# Table B2 ------------------------------------------------------------------ #

# percentage of each reference type
sum(ref_df %>% pull(out_party_ct))/(sum(ref_df %>% filter(all_out_ct > 0) %>% pull(all_out_ct)))
sum(ref_df %>% pull(out_pres_ct))/(sum(ref_df %>% filter(all_out_ct > 0) %>% pull(all_out_ct)))
sum(ref_df %>% pull(out_leader_ct))/(sum(ref_df %>% filter(all_out_ct > 0) %>% pull(all_out_ct)))

party_ct <- feols(I((out_party_ct/tot_wc) * 1000) ~ time3 + div + major_elex + 
    pparty + war_days + pres_app + f100 + pterm  | month, ref_doc_lev, vcov = 'iid')
pres_ct <- feols(I((out_pres_ct/tot_wc) * 1000) ~ time3 + div + major_elex + 
    pparty + war_days + pres_app + f100 + pterm | month, ref_doc_lev, vcov = 'iid')
lead_ct <- feols(I((out_leader_ct/tot_wc) * 1000) ~ time3 + div + major_elex +
    pparty + war_days + pres_app + f100 + pterm | month, ref_doc_lev, vcov = 'iid')

party_sent <- feols(comb_sent ~ I((out_party_ct/wc) * 1000) + time3 + div + 
    major_elex + pparty + war_days + pres_app + f100 + pterm | month, ref_df, 
    cluster = ~doc_id)
pres_sent <- feols(comb_sent ~ I((out_pres_ct/wc) * 1000) + time3 + div + 
    major_elex + pparty + war_days + pres_app + f100 + pterm | month, ref_df, 
    cluster = ~doc_id)
lead_sent <- feols(comb_sent ~ I((out_leader_ct/wc) * 1000) + time3 + div + 
    major_elex + pparty + war_days + pres_app + f100 + pterm | month, ref_df, 
    cluster = ~doc_id)

c_map_ref3 <- c(c('I((out_party_ct/wc) * 1000)' = 'Opp. Reference', 
        'I((out_pres_ct/wc) * 1000)' = 'Opp. Reference', 
        'I((out_leader_ct/wc) * 1000)' = 'Opp. Reference'), 
    c_map[4:5], c_map[1:3], c_map[6:length(c_map)])

modelsummary(models = list(party_ct, party_sent, pres_ct, pres_sent, lead_ct, lead_sent),
    coef_map = c_map_ref3,
    stars = TRUE,
    gof_omit = 'AIC|BIC|FE|Std.',
    output = 'markdown',
    escape = FALSE)

# Table B3 and Figure 3 ----------------------------------------------------- #

# 111th congress, document level
ob_df <- ref_doc_lev %>% 
    filter(congress == 111) %>% 
    # create variable for each period
    mutate(fp_eras = case_when(
            day < as_date('2009-07-01') ~ 'pre-period',
            day > as_date('2010-01-19') ~ 'post-period',
            TRUE ~ 'fp_maj'),
        fp_eras_factor = factor(fp_eras, c('fp_maj', 'pre-period', 'post-period'))) 

ob_eras_ct <- feols(tot_out_1k ~ fp_eras_factor + pres_app + major_elex + f100, 
    ob_df, vcov = 'iid')
ob_ct_pred <- predictions(ob_eras_ct, newdata = 
    datagrid(fp_eras_factor = c('fp_maj', 'pre-period', 'post-period')))

# 111th congress, paragraph level
ob_df_para <- ref_df %>% 
    filter(congress == 111) %>% 
    mutate(fp_eras = case_when(
            day < as_date('2009-07-01') ~ 'pre-period',
            day > as_date('2010-01-19') ~ 'post-period',
            TRUE ~ 'fp_maj'),
        fp_eras_factor = factor(fp_eras, c('fp_maj', 'pre-period', 'post-period')))

ob_eras_sent <- feols(comb_sent ~ tot_out_1k_para * fp_eras_factor + pres_app + 
    major_elex + f100, ob_df_para, cluster = ~doc_id)
ob_sent_me <- slopes(ob_eras_sent, variables = 'tot_out_1k_para', 
    newdata = datagrid(fp_eras_factor = c('pre-period', 'fp_maj', 'post-period')))

# plot coefficients
plot_coefs <- bind_rows(ob_ct_pred %>% mutate(title = 'Predicted Number of\nReferences per 1,000 Words'), 
    ob_sent_me %>% 
    mutate(title = 'Marginal Effect of Out-Party References\non Sentiment of Paragraphs')) %>% 
    mutate(title = factor(title, levels = c('Predicted Number of\nReferences per 1,000 Words', 
        'Marginal Effect of Out-Party References\non Sentiment of Paragraphs')),
        fp_eras_factor = factor(fp_eras_factor, levels = c('pre-period', 
            'fp_maj', 'post-period')))

c_map_ob <- c(
    'fp_eras_factorpost-period' = 'After Super-Majority',
    'fp_eras_factorpre-period' = 'Before Super-Majority',
    'tot_out_1k_para' = 'Out-Party Ref. per 1,000 Words',
    'tot_out_1k_para:fp_eras_factorpost-period' = 'Out-Party Ref. x After Super-Majority',
    'tot_out_1k_para:fp_eras_factorpre-period' = 'Out-Party Ref. x Before Super-Majority',
    'major_elex' = 'Major Election Season',
    'pres_app' = 'Presidential Approval',
    'f100' = 'First 100 Days',
    '(Intercept)' = 'Intercept'
)

modelsummary(models = list(ob_eras_ct, ob_eras_sent),
    coef_map = c_map_ob,
    stars = TRUE,
    gof_omit = 'AIC|BIC|FE|Std.',
    output = 'markdown',
    escape = FALSE)

ob_sent <- ggplot(plot_coefs, aes(x = fp_eras_factor, y = estimate)) + 
    facet_wrap(~title, scales = 'free') +
    geom_point() + 
    geom_hline(yintercept = 0, linetype = 'dashed') +
    geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0) +
    theme_baselike() + 
    scale_x_discrete(labels = c('Before\nSuper-Majority', 'During\nSuper-Majority', 'After\nSuper-Majority')) + 
    labs(y = '', x = '')

# Figure 4 ------------------------------------------------------------------ #

# create speech type variables and counts
rally_df <- ref_doc_lev %>% 
    mutate(
        speech3 = case_when(speech_type == 'rally' ~ 'Rally',
            speech_type == 'address' ~ 'Major Address',
            TRUE ~ 'All Others'),
        speech3 = factor(speech3, levels = c('Major Address', 'All Others', 'Rally')),
        cat_outref1k = case_when(
            tot_out_1k == 0 ~ '0',
            tot_out_1k > 0 & tot_out_1k <= 1 ~ '(0,1]',
            tot_out_1k > 1 & tot_out_1k <= 2 ~ '(1,2]',
            tot_out_1k > 1 & tot_out_1k > 2 ~ '2+'),
        cat_outref1k = factor(cat_outref1k, levels = c('0', '(0,1]', '(1,2]', '2+'))
    ) 

# aggregate speech types
rally_df_summarized <- rally_df %>%
  group_by(speech3, cat_outref1k) %>%
  summarize(n = n()) %>%
  mutate(prop = n / sum(n),
    speech3 = factor(speech3, levels = c('Major Address', 'All Others', 'Rally')))

rally_plot <- ggplot(rally_df_summarized, aes(x = cat_outref1k, y = prop, fill = as.factor(speech3))) + 
    geom_col(position = "dodge") +
    theme_baselike() + 
    scale_fill_manual(values = c('grey80', 'grey50','grey30')) + 
    labs(x = "References per 1,000 Words", y = "Proportion of Speeches", fill = "Speech Type") +
    theme(legend.position = 'bottom', legend.direction = 'horizontal')

# Table C1 ------------------------------------------------------------------ #

insttime_nofe_rally <- feols(tot_out_1k ~ speech3 + time + div + major_elex + 
    pparty + war_days + pres_app + f100 + pterm | month, rally_df, vcov = 'iid')
fe_rally <- feols(tot_out_1k ~ speech3 + div + major_elex + war_days + 
    pres_app + f100 + pterm | president + month, rally_df, vcov = 'iid')

c_map_s3 <- c('speech3Rally' = 'Rally', 'speech3All Others' = 'Other Speech Types', c_map)

modelsummary(models = list(insttime_nofe_rally, fe_rally),
    coef_map = c_map_s3,
    stars = TRUE,
    gof_omit = 'AIC|BIC|FE|Std.',
    output = 'markdown',
    escape = FALSE)

# Figure 5 ------------------------------------------------------------------ #

ref_sum <- ref_df %>% 
    filter(any_out_ref == 1) %>% 
    mutate(major_topic = str_to_title(major_topic),
        major_topic = if_else(major_topic %in% c('Junk', 'Functions'), 'Functional', major_topic)) %>% 
    group_by(major_topic) %>% 
    summarise(n = n()) %>% 
    mutate(prop = n / sum(n))

topic_plot <- ggplot(ref_sum, aes(x = reorder(major_topic, -prop), y = prop)) +
    geom_bar(stat = 'identity') +
    theme_baselike() + 
    scale_y_continuous(expand = c(0,0)) + 
    coord_cartesian(ylim = c(0, .3)) + 
    theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1), legend.position = 'bottom', legend.direction = 'horizontal') + 
    labs(x = 'Major Topic', y = 'Proportion of Out-Party References')

# Table C2 ------------------------------------------------------------------ #

io_topicfe <- feols(tot_out_1k_para ~ owned_issue * div + time + major_elex + 
    pparty + war_days + pres_app + f100 + pterm | month + max_topic, ref_df, 
    cluster = ~doc_id)
io_presfe <- feols(tot_out_1k_para ~ owned_issue * div + major_elex + war_days + 
    pres_app + f100 + pterm | month + president, ref_df, cluster = ~doc_id)

c_map_top <- c('owned_issue' = 'Owned Issue', 'owned_issue:div' = 'Owned Issue x Divided Government', c_map)

modelsummary(models = list(io_topicfe, io_presfe),
    coef_map = c_map_top,
    stars = TRUE,
    gof_omit = 'AIC|BIC|FE|Std.',
    output = 'markdown',
    escape = FALSE)

# Table C3 ------------------------------------------------------------------ #

inct_nofe <- feols(ref_diff_1k ~ (time + div + major_elex) + pparty + war_days + 
    pres_app + f100 + pterm | month, ref_doc_lev, cluster = ~doc_id)
inct_fe <- feols(ref_diff_1k ~ (div + major_elex) + pparty + war_days + 
    pres_app + f100 + pterm | president + month, ref_doc_lev, cluster = ~doc_id)

c_map_inp <- c('tot_in_1k_para' = 'In-Party Ref. per 1,000 Words', 
    'tot_in_1k_para:time' = 'In-Party Ref. x Majority Competition', 
    'tot_in_1k_para:div' = 'In-Party Ref. x Divided Government',
     'tot_in_1k_para:major_elex' = 'In-Party Ref. x Major Election',
    c_map_s)    

modelsummary(models = list(inct_nofe, inct_fe),
    coef_map = c_map_inp,
    stars = TRUE,
    gof_omit = 'AIC|BIC|FE|Std.',
    output = 'markdown',
    escape = FALSE)

# Merge TAPS Survey --------------------------------------------------------- #

# extract necessary covariates
ref_covars <- ref_df %>% 
    filter((year %in% 2012:2017) | (year == 2011 & month == 12)) %>% 
    filter(!(year == 2017 & month %in% c(1,2))) %>% 
    group_by(year, month, pparty, president) %>% 
    summarise(avg_app = mean(pres_app), pterm = min(pterm))

# lag presidential negative partisanship
ref_avg_lags <- ref_df %>%
    filter((year %in% 2012:2017) | (year == 2011 & month == 12)) %>% 
    group_by(year, month, any_out_ref, pparty) %>% 
    summarise(avg_sent = mean(comb_sent, na.rm = TRUE)) %>% 
    pivot_wider(names_from = 'any_out_ref', values_from = 'avg_sent') %>% 
    rename(out_ref = `1`, non_ref = `0`) %>% 
    ungroup() %>% 
    mutate(rev_out_sent = 1 - out_ref,
        rev_out_sent_l1 = lag(rev_out_sent),
        non_ref_sent_l1 = lag(non_ref),
        elex_month = if_else(month %in% 9:11 & year %in% c(2012, 2014, 2016), 1, 0))

# merge lagged data and covariates
ref_avg <- left_join(ref_avg_lags, ref_covars)

# merge survey data with presidency data
merged_df <- left_join(taps_merge, ref_avg) %>% 
    filter(month != 0) %>% 
    mutate(pres_co = case_when(
        (pparty == 'D' & resp_pid3 == 'D') | (pparty == 'R' & resp_pid3 == 'R') ~ 1,
        (pparty == 'D' & resp_pid3 == 'R') | (pparty == 'R' & resp_pid3 == 'D') ~ 0),
        my = if_else(month >= 10, paste(year, month, sep = '-'), paste(year, paste0(0, month), sep = '-'))) %>% 
        filter(!(year == 2017 & month %in% c(1,2))) 

# Figure D1 ----------------------------------------------------------------- #

agg_rate <- merged_df %>% 
    filter(pres_co == 1) %>% 
    group_by(year, month, pparty, my) %>% 
    summarise(avg_sent = mean(rev_out_sent_l1, na.rm = TRUE), 
        avg_out_app = mean(out_rate, na.rm = TRUE)) 

app_plot <- ggplot(agg_rate, aes(x = avg_sent, y = avg_out_app)) +
  geom_smooth(method = 'lm', color = 'grey50', size = 1, se = FALSE) + 
  geom_point(aes(color = pparty), size = 2, alpha = 0.7) + 
  geom_text_repel(aes(label = my, color = pparty), 
                  max.overlaps = 10, size = 3, 
                  fontface = 'bold', nudge_y = 0.02, seed = 42) +  
  theme_baselike() + 
  labs(x = 'Average Monthly Out-Party Negative Sentiment (Higher, More Negative)', 
    y = 'Average Out-Party Approval',
    color = '') +
  scale_color_manual(values = c(ggblue, ggred), labels = c('Obama', 'Trump')) +
  theme(legend.position = 'bottom', legend.direction = 'horizontal')

# Table D1 ------------------------------------------------------------------ # 

out_app_int <- feols(out_rate ~ rev_out_sent_l1 * pres_co + non_ref_sent_l1 + 
    elex_month + avg_app + pterm | WUSTLID + president, merged_df, 
    cluster = ~WUSTLID + my)
in_app_int <- feols(in_rate ~ rev_out_sent_l1 * pres_co + non_ref_sent_l1 + 
    elex_month + avg_app + pterm | WUSTLID + president, merged_df, 
    cluster = ~WUSTLID + my)

c_map_app <- c(
    'rev_out_sent_l1' = 'Lagged Out-Party Sentiment (Reverse Coded)',
    'pres_co' = 'Presidential Co-partisan',
    'rev_out_sent_l1:pres_co' = 'Lagged Sentiment x Presidential Co-Partisan',
    'non_ref_sent_l1' = 'Lagged Non-Reference Sentiment',
    'elex_month' = 'Election Season',
    'avg_app' = 'Average Approval',
    'pterm' = 'Term'
)

modelsummary(models = list(out_app_int, in_app_int),
    coef_map = c_map_app,
    stars = TRUE,
    gof_omit = 'AIC|BIC|FE|Std.',
    output = 'markdown',
    escape = FALSE)

# Figure 6 ------------------------------------------------------------------ # 

# create marginal effects
app_me_plot_out <- slopes(out_app_int, variables = 'rev_out_sent_l1', 
        newdata = datagrid(pres_co = c(0,1))) %>% 
    mutate(pres_co = if_else(pres_co == 1, 'Presidential Co-Partisan', 'Not Co-Partisan'),
        pres_co = factor(pres_co, levels = c('Presidential Co-Partisan', 'Not Co-Partisan')),
        lab = 'Out-Party Approval')

app_me_plot_in <- slopes(in_app_int, variables = 'rev_out_sent_l1', 
    newdata = datagrid(pres_co = c(0,1))) %>% 
    mutate(pres_co = if_else(pres_co == 1, 'Presidential Co-Partisan', 'Not Co-Partisan'),
        pres_co = factor(pres_co, levels = c('Presidential Co-Partisan', 'Not Co-Partisan')),
        lab = 'Own-Party Approval')

# merge marginal effects
app_me_plot <- bind_rows(app_me_plot_out, app_me_plot_in)

app_me <- ggplot(app_me_plot, aes(x = pres_co, y = estimate)) + 
    geom_point() + 
    geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width = 0) + 
    geom_hline(yintercept = 0, linetype = 'dashed') +
    theme_baselike() +
    facet_wrap(~lab) +
    labs(x = '', y = 'Marginal Effect of Presidential\nNegative Out-Party Rhetoric')

